

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# DISCLAIMER AND GENERAL INFORMATION
#
# File: 0_cbam_prep.R
# Purpose: Data preparation
# Date: 10 July 2024
# Data: Survey data from "./data.csv" 
#
# Technical disclaimer:
# All analyses in R version 4.4.1 (2024-06-14 ucrt) -- "Race for Your Life"
# R Studio 2024.04.2 Build 764 ("Chocolate Cosmos" Release (e4392fc9, 2024-06-05) for Windows)
# Windows 10 Enterprise, 64-bit
# 12th Gen Intel(R) Core(TM) i7-1255U 1.70 GHz with 16GB RAM
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (A) Load data and packages ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

library(tidyverse)
library(haven)

# Load data
dta <- readRDS("./data.rds") 

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (B) Create DVs and key treatment variables ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# DV: Create support outcome measure
dta$cbam_support <- ifelse(dta$co2_tax_support!=9999,dta$co2_tax_support,NA)

# DV: Climate-trade trade-off (binary variable)
dta$tradeoff_bin <- ifelse(dta$tradeoff<3,0,
                           ifelse(dta$tradeoff>3,1,NA))

# DV: Create effect outcome measure
dta$cbam_person <- ifelse(dta$co2_tax_person!=9999,dta$co2_tax_person,NA)
dta$cbam_region <- ifelse(dta$co2_tax_region!=9999,dta$co2_tax_region,NA)
dta$cbam_country <- ifelse(dta$co2_tax_country!=9999,dta$co2_tax_country,NA)

# Invert impact measures, so that bigger values indicate more positive impacts
dta$cbam_person_inv <- (dta$cbam_person-6)*(-1)
dta$cbam_region_inv <- (dta$cbam_region-6)*(-1)
dta$cbam_country_inv <- (dta$cbam_country-6)*(-1)


# Create treatment indicator: 1-15
dta$tr <- as.numeric(dta$experiment2)

# Group indicator for 12 experimental groups
dta$exp_group <- ifelse(dta$tr==1, 1,
                 ifelse(dta$tr==2, 2,
                 ifelse(dta$tr==3, 3,
                 ifelse(dta$tr==4 | dta$tr==5, 4,
                    ifelse(dta$tr==6, 5,
                    ifelse(dta$tr==7, 6,
                    ifelse(dta$tr==8, 7,
                    ifelse(dta$tr==9 | dta$tr==10, 8,
                        ifelse(dta$tr==11, 9,
                        ifelse(dta$tr==12, 10,
                        ifelse(dta$tr==13, 11,
                        ifelse(dta$tr==14 | dta$tr==15, 12, NA
                               ))))))))))))

# Create value labels
dta$exp_group_fct <- factor(dta$exp_group,
                        levels=seq(1:12),
                        labels=c("Control", "Jobs", "Prices", "Both",
                                 "Trade frame: Control", "Trade frame: Jobs", "Trade frame: Prices", "Trade frame: Both",
                                 "Climate frame: Control", "Climate frame: Jobs", "Climate frame: Prices", "Climate frame: Both"))


# Main treatment IDs (collapsed frames)
dta$tr_main <- ifelse(dta$exp_group==1 | dta$exp_group==5 | dta$exp_group==9, 1,
                     ifelse(dta$exp_group==2 | dta$exp_group==6 | dta$exp_group==10, 2,
                            ifelse(dta$exp_group==3 | dta$exp_group==7 | dta$exp_group==11, 3, 4)))


# Main frame IDs (collapsed vignettes)
dta$tr_frame <- ifelse(dta$exp_group==1 | dta$exp_group==2 | dta$exp_group==3 | dta$exp_group==4, 1,
                      ifelse(dta$exp_group==5 | dta$exp_group==6 | dta$exp_group==7 | dta$exp_group==8, 2, 3))


# Create treatment indicator: 1-15
dta$tr <- as.numeric(dta$experiment2)

# Create ID for treatments with potential order effects
dta$orderID <- ifelse(dta$tr==4 | dta$tr==9 | dta$tr==14, 1,
                      ifelse(dta$tr==5 | dta$tr==10 | dta$tr==15, 2,NA))


# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (C) Create main covariates ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Gender
dta$female <- ifelse(dta$gender==2,1,0)

# Age
dta$age_cat <- ifelse(dta$age<25, 1,
                  ifelse(dta$age>24 & dta$age<35, 2,
                  ifelse(dta$age>34 & dta$age<45, 3,
                  ifelse(dta$age>44 & dta$age<55, 4,
                  ifelse(dta$age>54 & dta$age<65, 5,
                  ifelse(dta$age>64, 6, NA
                  ))))))

# Income
dta$income_cat <- ifelse(dta$income<=7, 1,
                  ifelse(dta$income>7 & dta$income<=9, 2,
                  ifelse(dta$income>9 & dta$income<=11, 3,
                  ifelse(dta$income>11 & dta$income<=13, 4, NA
                  ))))
                         
# Education
dta$edu_cat <- dta$highest_edu_cat

# Employment
dta$employ_cat <- ifelse(dta$sector!=7, dta$sector, NA)
dta$retired <- ifelse(dta$employ_cat==6, 1, 0)
dta$wowork <- ifelse(dta$employ_cat==4, 1, 0)

# Political interest
dta$pol_interest <- ifelse(dta$pol_interest!=9998, dta$pol_interest, NA)

# Climate concern
dta$cc_concern <- ifelse(dta$climate!=9999, dta$climate, NA)

# Ideology
dta$lr_cat <- ifelse(dta$leftright<5, 1,
                    ifelse(dta$leftright==5 | dta$leftright==6, 2,
                    ifelse(dta$leftright>6, 3, NA
                    )))


# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (D) Split data by attention check ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


# Response==2 is the correct answer
table(dta$attention_exp2,dta$country) # raw data

freq.tab <- dta %>% # raw data in percentages
  group_by(country) %>%
  summarize(count=table(attention_exp2),n=n()) %>%
  mutate(freq=count/n)
freq.tab


# Drop respondents who do not pass the attention check
dta_full <- dta # full data set with all respondents
dta <- dta[dta$attention_exp2==2,] # trimmed data set


# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#                         END OF FILE
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


